function [dependent_variable,regressor_mat,country_Fixed_Effects_mat,time_Fixed_Effects_mat,linear_indices]=create_VAR_matrices_FE_removed(dependent_variable,regressor_mat,data_array_for_regression_stacked_by_variable,pos,country_indicator_names_mapping,time_fixed_effects_dummy,country_fixed_effects_dummy,drop_warning_dummy)
% function [dependent_variable,regressor_mat,country_Fixed_Effects_mat,time_Fixed_Effects_mat,linear_indices]=create_VAR_matrices_FE_removed(dependent_variable,regressor_mat,data_array_for_regression_stacked_by_variable,pos,country_indicator_names_mapping,time_fixed_effects_dummy,country_fixed_effects_dummy,drop_warning_dummy)
% Create regressor matrix by dropping all time-points that have no non-NaN observations
% We need at least one country with full set of regressors
% 
% Inputs:
%   - dependent_variable                                [T by n_countries] dependent variable matrix
%   - regressor_mat                                     [T by n_countries by nvars] regressor matrix before removing NaNs and adding fixed effects
%   - data_array_for_regression_stacked_by_variable     [T by n_countries by nvars] data matrix including fixed effects
%   - pos                                               [structure] containing variable position
%   - time_fixed_effects_dummy                          [boolean]   indicator whether time fixed effects are requested
%   - country_fixed_effects_dummy                       [boolean]   indicator whether country fixed effects are requested
%   - drop_warning_dummy                                [boolean]   
%
% Outputs:
%   - dependent_variable                                [T_non_NaN times n_countries by nvars] dependent variable matrix after removing NaNs
%   - regressor_mat                                     [T_non_NaN times n_countries by nvars] regressor matrix after removing NaNs
%   - country_Fixed_Effects_mat                         [T_non_NaN times n_countries by n_countries] country fixed effects  
%   - time_Fixed_Effects_mat                            [T_non_NaN times n_countries by T_non_NaN] time fixed effects
%   - linear_indices                                    [T_non_NaN times by 1] linear indices of the T_non_NaN times n_countries in the original
%                                                           matrix

% Copyright (C) 2019-2023 Benjamin Born, Francesco D'Ascanio, Gernot J. Mueller, Johannes Pfeifer
%
% This is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% It is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
% 
% For a copy of the GNU General Public License,
% see <http://www.gnu.org/licenses/>.

if nargin<8
    drop_warning_dummy = 1;
end

time_dim=1;
country_dim=2;
var_dim=3;

%% Stack matrices
n_dependent_variables=size(dependent_variable,3);
stacked_matrix=cat(3,dependent_variable,regressor_mat);

% prepare Netherlands dummy, break in the G series due to reclassification
Netherlands_break_dummy=zeros(size(dependent_variable,1),size(dependent_variable,2));
Netherlands_break_dummy(data_array_for_regression_stacked_by_variable(:,1,pos.timeline)>=2006,data_array_for_regression_stacked_by_variable(1,:,pos.country_ident)==15)=1;

%get NaN-entries
NaN_entries=isnan(stacked_matrix);
%find country-time observations with only incomplete set of variables
country_time_NaN_obs=any(NaN_entries,var_dim);
%find timepoints without country with  full set of variables
timepoints_without_full_set=all(country_time_NaN_obs,country_dim);
time_indices_without_only_NaN_obs=find(~all(country_time_NaN_obs,country_dim));
linear_indices.linear_indices=reshape(1:numel(stacked_matrix(:,:,1)),size(stacked_matrix,1),size(stacked_matrix,2));
linear_indices.original_dimensions=size(stacked_matrix(:,:,1));
%select unproblematic time-points
stacked_matrix_without_NaN_timepoints=stacked_matrix(~timepoints_without_full_set,:,:);
Netherlands_break_dummy=Netherlands_break_dummy(~timepoints_without_full_set,:,:);
linear_indices.linear_indices(timepoints_without_full_set,:,:)=[];

%% get countries to be dropped; NB: cannot affect time points, because only countries without full set of regressors are dropped
NaN_entries=isnan(stacked_matrix_without_NaN_timepoints);
country_time_NaN_obs=any(NaN_entries,var_dim);
country_without_full_set=all(country_time_NaN_obs,time_dim);
country_indices_without_only_NaN_obs=find(~country_without_full_set');

if any(country_without_full_set) && drop_warning_dummy
    fprintf('\n')
    country_index=find(country_without_full_set);    
    fprintf('I am dropping %s because there are only NaN observations in the sample\n',country_indicator_names_mapping{data_array_for_regression_stacked_by_variable(1,find(country_without_full_set),pos.country_ident)})
    fprintf('\n')
end

stacked_matrix_final=stacked_matrix_without_NaN_timepoints(:,country_indices_without_only_NaN_obs,:);
Netherlands_break_dummy=Netherlands_break_dummy(:,country_indices_without_only_NaN_obs,:);
linear_indices.linear_indices=linear_indices.linear_indices(:,country_indices_without_only_NaN_obs,:);

n_countries=size(dependent_variable,2);

%% add fixed effects
if country_fixed_effects_dummy
    country_FE_indices=[];
    for ii=1:length(country_indices_without_only_NaN_obs)
        country_FE_indices=[country_FE_indices, pos.(['Country_FE_' num2str(country_indices_without_only_NaN_obs(ii))])];
    end
    data_array_for_regression_country_Fixed_Effects=data_array_for_regression_stacked_by_variable(time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs,country_FE_indices);
else
    data_array_for_regression_country_Fixed_Effects=[];    
end
if time_fixed_effects_dummy
    time_FE_indices=[];
    if country_fixed_effects_dummy %drop one time FE to avoid dummy variable trap
        for ii=1:length(time_indices_without_only_NaN_obs)-1 
            time_FE_indices=[time_FE_indices, pos.(['Time_FE_' num2str(time_indices_without_only_NaN_obs(ii))])];
        end
    else
        for ii=1:length(time_indices_without_only_NaN_obs)
            time_FE_indices=[time_FE_indices, pos.(['Time_FE_' num2str(time_indices_without_only_NaN_obs(ii))])];
        end
    end
    data_array_for_regression_time_Fixed_Effects=data_array_for_regression_stacked_by_variable(time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs,time_FE_indices);
else
    data_array_for_regression_time_Fixed_Effects=[];    
end
if ~country_fixed_effects_dummy && ~time_fixed_effects_dummy %add constant to regression
    data_array_for_regression_time_Fixed_Effects=ones(size(regressor_mat,1),n_countries);
end

[T,n_countries, nvars]=size(stacked_matrix_final);

trend_mat=zeros(T*n_countries,n_countries);
for ii=1:n_countries
    trend_mat((ii-1)*T+1:ii*T,ii)=zscore(1:T);
end

stacked_matrix_final=reshape(stacked_matrix_final,[size(stacked_matrix_final,1)*size(stacked_matrix_final,2) size(stacked_matrix_final,3)]);
country_Fixed_Effects_mat=reshape(data_array_for_regression_country_Fixed_Effects,[size(data_array_for_regression_country_Fixed_Effects,1)*size(data_array_for_regression_country_Fixed_Effects,2) size(data_array_for_regression_country_Fixed_Effects,3)]);
time_Fixed_Effects_mat=reshape(data_array_for_regression_time_Fixed_Effects,[size(data_array_for_regression_time_Fixed_Effects,1)*size(data_array_for_regression_time_Fixed_Effects,2) size(data_array_for_regression_time_Fixed_Effects,3)]);
Netherlands_break_dummy=reshape(Netherlands_break_dummy,[size(Netherlands_break_dummy,1)*size(Netherlands_break_dummy,2) size(Netherlands_break_dummy,3)]);
full_data_mat=[stacked_matrix_final, country_Fixed_Effects_mat, time_Fixed_Effects_mat];
nan_indices=find(any(isnan(full_data_mat),2));
linear_indices.linear_indices=reshape(linear_indices.linear_indices,[size(linear_indices.linear_indices,1)*size(linear_indices.linear_indices,2) size(linear_indices.linear_indices,3)]);

stacked_matrix_final(nan_indices,:)=[];
country_Fixed_Effects_mat(nan_indices,:)=[];
time_Fixed_Effects_mat(nan_indices,:)=[];
trend_mat(nan_indices,:)=[];
linear_indices.linear_indices(nan_indices,:)=[];
Netherlands_break_dummy(nan_indices,:)=[];
X = [country_Fixed_Effects_mat time_Fixed_Effects_mat trend_mat Netherlands_break_dummy];
A_OLS = (X'*X)\(X'*stacked_matrix_final); % This is the matrix of regression coefficients
Y_country_specific=stacked_matrix_final - X*A_OLS;

%store result
dependent_variable=Y_country_specific(:,1:n_dependent_variables);
regressor_mat=Y_country_specific(:,n_dependent_variables+1:end);